/* diff - a file comparison program
   Copyright (C) 1986 Free Software Foundation

   This will be free software eventually, but not until it is finished.

   @@ replace with the latest copyright/temms notice from rms when
   diff is ready for release.

   The basic algorithm is described in: "A File Comparison Program",
   Webb Miller and Eugene W. Myers, Software-Practice and Experience,
   Volume 15, Number 11, November 1985, pages 1025-1039.

   Created.  Basic algorithm and functionality in place.
	- tower@prep.ai.mit.edu		Mon Feb 23 07:44:18 1987
*/

/* @@ a double at sign has been used to indicate spots where work is still
needed.

..!tektronix!videovax!stever has expressed interest in beta-testing
the program via a port to the amiga.

jeffl@wolf.UUCP was looking for a PC/AT-SCO Xenix version.  Might be willing
to test and port.

@@ Left to do:		Oueries welcome. -len tower <tower@prep.ai.mit.edu>

1 off bug: "diff{-new,} diff-NEWS*":  12,17c12,12 

lint it when it's all done.

Get the multiple exclusive option code to work better.  Check the GNU ps to
see how this is done there.

close dirs on file/dir dir/file case (so diff doesn't run out of file
desriptors.

close files on multiple file case.

use stdio if not a file or dir...ie:
handle length problem if stdin is used and it's a pipe/tty.
(believe present code fails for pipe/tty).

flags:
       -e finish
       -b, -i  (note array lookup technique in /u2/emacs/src/etags.c
BSD flags:
       -c make # of context lines variable
       -l pipe each file thru "pr".  for dir only: output other diffs at end.
       -r do sub-directories recusively
       -Sname (start dir cmp at name)
       -Dstring (output cpp format with #ifdef string causing file2)
       others?

(losing?) BSD behaviour:

   # diff inews inews.old
   Binary files inews and inews.old differ
   (Should at least, report first difference point as cmp does!)

   For compares of more than one (why not one?) include entire file in output
   when one file is non-existant (use a special check to prevent all the
   grinding).  Useful for diff/patch based method of update distribution
   (used e.g. in GNU Emacs).  Maybe cause this behaviuor by a flag?  (or
   turn it off with a flag?)

Hack a dir-sort library out of ls, and use it for ls, diff, et.al.
(or just exec ls to get a sorted list of each dir).

N filename args with dir-name as firts or last arg, with error checking
for a dir mid-list or more than one dir with more than two args

chunking program if file(s) is too long (>200K).
load 200K, when get to last complete line, trace chain back to first
gap (same as header break in output), output to there (handle special
case where first gap is beginning of chain). move input after gap to
beginning of buffer, fill buffer to 200K (or EOF),
and loop after resetting file and line number pointers, and
throwing out unused chain (easier to recalculate, than adjust).

line-end-char flag

Date: Fri, 22 Aug 86 19:03:08 EDT
From: rms (Richard M. Stallman)
To: tower
Subject: comments on diff-new

9. It is important for character codes over \200 octal to work.  Perhaps
they will work automatically.  If not, changing char to unsigned char
in some places is probably enough to make them work.

10. diff should be able to handle files that do not end in newlines.
So it should remember, for each file, whether the last line had
a newline at the end.  This information can be ignored in doing the
actual comparison, but some output routines must find a way to
indicate this in the output.

11. It is useful to include character positions as well as line numbers
when describing the position of each difference.  Perhaps a new
command line switch should control this feature.

-C (after wc -c ...)

If the entire file is read in at once, the character position is
straightforwardly found from the address of the line's contents.

12. There should be a command line switch that causes all the lines of
difference to be output with a total of 8 characters of stuff added at
the left.  This preserves the alignment of tabs and spaces within the
quoted text.

Use -8 as the flag?

13. There should be a feature for ignoring all blank lines of input.
Perhaps -b should control this, or perhaps another new switch.

Len: -B ?? how to handle line numbers? a second array for printing them?

14. There should be a feature to make each set of changes print
(in the **** line) the name of the function they occur in.
Assume that a line with an alphanumeric character in column 0
is a function definition beginning and that the function name
starts there.  This feature is present in the ITS file comparison
program and it is very much appreciated.

-F ?? (better one? (see etags switches?)

15. There should be a switch to ignore changes in case (not count
them as making lines differ). -i

16. There should be a switch to ignore changes in C comments
(not count them as making lines differ).  This cannot work perfectly;
it will be confused by things inside strings that look like comments,
and it may not handle multi-line comments, but it will still be useful.

Len: -E from cc?

 */

#include <stdio.h>
#include <strings.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <ndir.h>
#include <errno.h>
extern int errno;
extern int sys_nerr;
extern char *sys_errlist[];

#define	EOS		(0)
#define	FALSE		(0)	/* TRUE is any non-zero value  */

enum edit_cmd {
  INSERT,
  DELETE};

/* command line options */
int ws_same_flag = FALSE;	/* all strings of contiguous spaces and tabs
				   are not different  */
int print_context_flag = FALSE;	/* print out several lines of context before
				   and after each difference  */
int ed_script_flag = FALSE;	/* output script that the editor 'ed' can use
				   to produce file2 from file1 */
int ignore_case_flag = FALSE;	/* case differences are ignored  */
int print_file_same_flag = FALSE; /* report if files are the same  */
int print_english_headers_flag = FALSE;	/* difference headers and separators
					   in ouput are verbose english  */

int context_to_show = 3;	/* number of lines of context to show before
				   and after each change when
				   print_context_flag is set.  */

int complex_cmp = 0;		/* non-FALSE if not a simple compare
				   (e.g. ignoring case, or
				   condensing white space) */

char line_end_char = '\n';	/* character that ends a line  */

int exit_status = 0;		/* need this for multi-pair comparisions
				   0: no differences
				   1: differnces found
				   2: abnormal termination */

/* edit scripts are stored in linked lists  */
struct edit {
  struct edit *link;		/* previous edit command  */
  enum edit_cmd op;
  int lineA;			/* line number in file1  */
  int lineB;			/* line number in file2  */
};

struct line_def {
  char *text;
  int length;
} *A, *B;			/* alias for inf[0,1].L->line  */

struct line_array_def {
  struct line_def line[1];	/* The first element of a dynamically sized
				   array.  */
} *Ap, *Bp;			/* alias for inf[0,1].L  */

/* data on two files being compared  */
struct {
  int	desc;			/* file descriptor  */
  char	*name;			/* file name  */
  struct stat stat;		/* file status from fstat()  */
  int	argv_dir_p;		/* filename argument is a directory  */
  int	maxL;			/* number of lines in file  */
  struct line_array_def *L;	/* pointer to array of data on lines of file.
				   The array never has more than maxL
				   elements.
				   It is allocated in diff_2_files().  */
} inf[2];

int maxA, maxB;			/* "alias" for inf[0,1].maxL */

static
void
err_msg (text, arg)
     char *text, *arg;
{
  fprintf (stderr, "diff: %s ", text);
  fprintf (stderr, "%s\n", arg);
}

/* use when a system call returns non-zero status.  */
static
void
sys_err_msg (text)
     char *text;
{
  fprintf (stderr, "diff: %s: %s\n", text,
	   (errno > sys_nerr
	    ? "No Message in System Table sys_errlist[]"
	    : sys_errlist[errno]));
}

static
void
fatal (message)
     char *message;
{
  err_msg (message, "");
  exit (2);
}

static
char *
xmalloc (size)
     unsigned size;
{
  char *malloc();
  register char *value = malloc (size);

  if ((int) value == 0)
    fatal ("virtual memory exhausted");
  return value;
}

/* line_cmp - compare two lines of the files  */
static
int
line_cmp (s1, s2)
     struct line_def *s1, *s2;
{
  register char *t1, *t2;

  /* @@ need to have this consider line[].length for case where one file has
     @@ line_end_char and other does not.  */
  if ((s1->length != s2->length) && !complex_cmp)
    return (1);

  t1 = s1->text;
  t2 = s2->text;

  while (*t1++ == *t2++) {
    if ((*t1 == line_end_char) && (*t2 == line_end_char))
      return (0);
    if (complex_cmp)
      abort();			/* ws and ignore_case stuff */
  }
  return (1);
}

/* print_1_line - print the prefix and text of a single line  */
static
void
print_1_line (prefix, line)
     char *prefix;
     struct line_def *line;
{
  printf ("%s", prefix);
  fwrite (line->text, sizeof (char), line->length, stdout);
  if (line_end_char != '\n')
    printf ("\n");
}

char *context_prefix;
int context_start, context_end;

/* print_range - print a range of lines from the proper file.
   If context is wanted, print it, using globals variables context_start and
   context_end (both of which should have been checked for boundary
   conditions).  */
print_range (L, start, end, prefix)
     struct line_array_def *L;
     int start, end;	/* origin 1  */
     char *prefix;
{
  int i;

  if (print_context_flag) {
    for (i = context_start; i < start; i++)
      print_1_line (context_prefix, &(L->line[i-1]));
  }

  for (i = start; i <= end; i++)
    print_1_line (prefix, &(L->line[i-1]));

  if (print_context_flag) {
    for (i = end + 1; i <= context_end; i++)
      print_1_line (context_prefix, &(L->line[i-1]));
  }
}

/* print_script - print the edit script  */
static
void
print_script (start)
     struct edit *start;
{
  struct edit *ep, *ap, *bp;
  int	i, change;
  int	ins_startA, ins_endA, del_startA, del_endA;
  int	ins_startB, ins_endB, del_startB, del_endB;
  char	print_letter,		/* letter to use in a header line */
	*del_prefix,		/* prefix for a deleted line */
	*ins_prefix,		/* prefix for a inserted line */
	*sep_format;		/* printf format for a separator line */

  if (ed_script_flag)
    ep = start;
  else {
    /* reverse the pointers  */
    struct edit *behind, *ahead = start;

    ep = NULL;
    while (ahead != NULL) {
      behind = ep;
      ep = ahead;
      ahead = ahead -> link;
      ep->link = behind;	/* flip the pointer  */
    }
  }

  /* set up non-varying printf formats et. al. */
  if (print_english_headers_flag) {
    ins_prefix =
      del_prefix = "  ";
    sep_format = "To:\n";
  }
  else if (print_context_flag) {
    context_prefix = "  ";
    for (i = 0; i <= 1; i++) {
      extern char *ctime();
      
      printf ("%s %s\t%s", (i ? "---" : "***"), inf[i].name,
	      (inf[i].desc == 0 ? "\n" : ctime (&inf[i].stat.st_mtime)));
    }
  }
  else {
    ins_prefix = "> ";
    del_prefix = "< ";
    sep_format = "---\n";
  }

  /* print the edit commands  */
  while (ep != NULL) {
    if (ep->op == INSERT) {
      ins_startA = ep->lineA;
      ins_startB = ep->lineB;
      bp = ep;

      do {
	ins_endA =  ep->lineA;
	ins_endB =  ep->lineB;
	ep = ep->link;
      } while ((ep != NULL) && (ep->op == INSERT) && (ep->lineA == bp->lineA));

      if (print_english_headers_flag)
	printf ("Inserted after line %d:\n", ins_startA);
      else {
	if (print_context_flag) {
	  print_letter = 'i';	/* to setup ins_prefix ...  */
	  change = FALSE;	/* ... properly */
	  context_start = (( (ins_startA + 1 - context_to_show) < 1)
			   ? 1
			   : (ins_startA + 1 - context_to_show));
	  context_end = (((ins_endA + context_to_show) > maxA)
			 ? maxA
			 : (ins_endA + context_to_show));
	  printf ("***************\n*** %d,%d\n", context_start, context_end);
	  /* just need the context here.  */
	  for (i = context_start; i <= context_end; i++)
	    print_1_line (context_prefix, &A[i-1]);
	}
	else {
	  if (ins_startB != ins_endB)
	    printf ("%da%d,%d\n", ins_startA, ins_startB, ins_endB);
	  else
	    printf ("%da%d\n", ins_startA, ins_startB);
	}
      }
    }

    else {
      /* DELETE: look for a block of consecutive deleted lines.  */
      del_startA = ep->lineA;
      del_startB = ep->lineB;

      do {
	del_endA = ep->lineA;
	del_endB = ep->lineB;
	ap = ep;
	ep = ep->link;
      } while ((ep != NULL) && (ep->op == DELETE) &&
	       (ep->lineA == ap->lineA + 1));

      bp = ep;			/* b points to the command after
				   the last deletion.  */
      change =
	((ep != NULL) && (ep->op == INSERT) && (ep->lineA == ap->lineA));

      if (change) {
	/* count the inserted lines */
	ins_startA = ep->lineA;
	ins_startB = ep->lineB;

	do {
	  ins_endA = ep->lineA;
	  ins_endB = ep->lineB;
	  ep = ep->link;
	} while ((ep != NULL)&&(ep->op == INSERT) && (ep->lineA == bp->lineA));
 	if (print_english_headers_flag)
	  printf ("Changed ");
	else
	  print_letter = 'c';
      }
      else {
	if (print_english_headers_flag)
	  printf ("Deleted ");
	else
	  print_letter = 'd';
      }

      if (print_context_flag) {
	context_start = (del_startA - context_to_show);
	context_start = ((context_start < 1) ? 1 : context_start);
	context_end = (((del_endA + context_to_show) > maxA)
		       ? maxA
		       : (del_endA + context_to_show));
	printf ("***************\n*** %d,%d\n", context_start, context_end);
	del_prefix = (change ? "! " : "- " );
      }
      else if (del_startA == del_endA) {
	if (print_english_headers_flag)
	  printf ("line %d:\n", del_startA);
	else
	  printf ("%d%c%d\n", del_startA, print_letter,
		  del_startB + (change ? 1 : 0) );
      }
      else {
	if (print_english_headers_flag)
	  printf ("lines %d-%d:\n", del_startA, del_endA);
	else {
	  printf ("%d,%d%c%d,%d\n", del_startA, del_endA, print_letter,
		  del_startB + (change ? 1 : 0), del_endB + (change ? 1 : 0));
	}
      }

      /* Print the deleted lines.  */
      print_range (Ap, del_startA, del_endA, del_prefix);

      if ( !print_context_flag && !change )
	continue;

      if (!print_context_flag)
	printf ("%s", sep_format);
    }

    /* Print the inserted lines.  */
    if (print_context_flag) {
      if ( !change && !('i' == print_letter)) {
	ins_startB = del_startB + 1;
	ins_endB = del_endB;	
      }
      context_start = (((ins_startB - context_to_show) < 1)
		       ? 1
		       :(ins_startB - context_to_show));
      context_end = (((ins_endB + context_to_show) > maxB) /* @@??maxA?? */
		     ? maxB
		     : (ins_endB + context_to_show));
      printf ("\n--- %d,%d -----\n", context_start, context_end);
      ins_prefix = (change ? "! " : (print_letter != 'i' ? "  " : "+ "));
    }

    print_range (Bp, ins_startB, ins_endB, ins_prefix);
  }
}

/* make_script - does the diffs and prints out the results.
   Requires both files to be open, stat'ed, and input.  */
static
  
void
make_script ()		/* @@ better interface later? */
{
  int	max_d = maxA + maxB,	/* bound on size of edit script  */
	origin = maxA + 1,	/* origin diagonal */
	lower,			/* left-most diagonal under consideration  */
	upper,			/* right-most diagonal under consideration  */
	d,			/* current edit distance  */
	k,			/* current diagonal  */
	row,			/* row number  */
	col;			/* column number  */

  /* for each diagonal, two items are saved:  */
  int	*last_d			/* - the row containing the last d;  */
    = (int *) alloca ((maxA + maxB + 1) * sizeof (int));
  struct edit **script		/* - corresponding edit script.  */
    = (struct edit **) alloca ((maxA + maxB + 1) * sizeof (struct edit *));

  struct edit *new;		/* current instruction in an edit script  */

  /* Initialize: 0 entries in D indicate identical prefixes.  */
  for (row = 0;
       (row < maxA) && (row < maxB) && (0 == line_cmp (&A[row], &B[row]));
       ++row)   ;
  last_d [origin] = row;
  script [origin] = NULL;
  lower = (row == maxA) ? origin + 1 : origin - 1;
  upper = (row == maxB) ? origin - 1 : origin + 1;

  if (lower > upper) {		/* The files are identical.  */
    if (print_file_same_flag)
      printf ("No differences encountered. The files are identical.\n");
    /* @@ need dir_mode message */
    /* exit_status initialized to zero */
    return;
  }

  /* for each value of the edit distance  */
  for (d = 1; d <= max_d; ++d) {
    /* for each relevant diagonal  */
    for (k = lower; k <= upper; k += 2) {
      /* Get space for the edit instruction.  */
      new = (struct edit *) xmalloc ((unsigned) sizeof (struct edit));

      /* Find a d on diagonal k.  */
      if ((k == origin - d)
	  || ((k != origin + d) && (last_d[k+1] >= last_d[k-1]))) {
	  /* Moving down from the last d-1 on diagonal k+1 puts one
	     farther along diagonal k,
	     than does moving right from the last d-1 on diagonal k-1.  */
	  row = last_d[k+1] + 1;
	  new->link = script [k+1];
	  new->op = DELETE;
	}
      else {
	/* Move right from the last d-1 on diagonal k-1.  */
	row = last_d[k-1];
	new->link = script [k-1];
	new->op = INSERT;
      }

      /* Code common to both DELETE and INSERT.  */
      new->lineA = row;
      new->lineB = col = row + k - origin;
      script[k] = new;

      /* Slide down the diagonal.  */
      while ((row < maxA) && (col < maxB) && (0 == line_cmp (&A[row], &B[col]))) {
	++row;
	++col;
      }
      last_d[k] = row;

      if ((row == maxA) && (col == maxB)) {
	/* Hit south east corner; have the answer!  */
	print_script (script[k]);
	exit_status = 1;
	return;
      }

      if (row == maxA)		/* Hit last row; don't look to the left.  */
	lower = k + 2;

      if (col == maxB)		/* Last column; don't look to the right.  */
	upper = k - 2;
    }
    --lower;
    ++upper;
  }
  abort();			/* should NEVER get here */
}

/* diff_2_files - requires 2 input files open, and
   inf[1,2].desc,name,char initialized.  */
static
void
diff_2_files ()
{
  int fnum;			/* loop iterator  */

  /* check if the two named files are actually the same physical file.  */
  if ((inf[0].stat.st_ino == inf[1].stat.st_ino) &&
      (inf[0].stat.st_dev == inf[1].stat.st_dev) &&
      (print_file_same_flag)) {
    printf ("No differences encountered. The files are identical.\n");
    /* @@ need dir_mode message */
    return;
  }

  /* Read in file1 and file2, and initialise aliases.  */
  for (fnum = 0; fnum <= 1; fnum++) {
    int	lines = 0,		/* number of lines in file - origin 1  */
	Llength,		/* maximum number of lines L can hold  */
	fsize = inf[fnum].stat.st_size;	/* size of file  */
    char *fbuf = (char *) alloca (fsize + 1),
	 *i;			/* loop iterator  */

    if (read (inf[fnum].desc, fbuf, fsize) < 0)
      sys_err_msg (inf[fnum].name);

    Llength = 5;		/* @@ change to 1000 fro release */

    inf[fnum].L = (struct line_array_def *)
      xmalloc ((unsigned) ((unsigned) Llength *
			   (unsigned) sizeof (struct line_def)));
    inf[fnum].L->line[lines++].text = (char *) fbuf;

    for (i = fbuf; i <= fbuf + fsize; i++) {
      if ((*i == line_end_char) && ((i + 1) < (fbuf + fsize))) {
	if (lines >= Llength) {
	  int oldLlength = Llength;
	  struct line_array_def *oldL;

	  oldL = inf[fnum].L;

	  Llength = ((Llength + 1) * fsize) / (i - fbuf);
	  /* Assume average line length seen so far is average line length of
	     whole file, and fudge a little just in case.  */

	  inf[fnum].L = (struct line_array_def *)
	    xmalloc ((unsigned) ((unsigned) Llength *
				 (unsigned) sizeof (struct line_def)));
	  bcopy (oldL, inf[fnum].L, oldLlength * sizeof (struct line_def));
	  free ((char *) oldL);
	}
	inf[fnum].L->line[lines - 1].length =
	  (i + 1) - inf[fnum].L->line[lines - 1].text;
	inf[fnum].L->line[lines++].text = (char *) (i + 1);
      }
    }
    inf[fnum].L->line[lines - 1].length =
      inf[fnum].L->line[lines - 1].text - inf[fnum].L->line[lines - 2].text;

    close (inf[fnum].desc);
    inf[fnum].maxL = lines;
  }

  maxA = inf[0].maxL;
  maxB = inf[1].maxL;
  Ap = inf[0].L;
  Bp = inf[1].L;
  A = Ap->line;
  B = Bp->line;

  make_script ();

  free ((char *) Ap);		/* free the xmalloc'ed space */
  free ((char *) Bp);
}

main (argc, argv)
     int argc;
     char *argv[];
{
  char	 *ap;
  int i;

  while (argc > 1 && *(ap = argv[1]) == '-' && *++ap != EOS) {
    while (*ap != EOS) {
      switch (*ap++) {

      case 'b':
	ws_same_flag++;
	complex_cmp++;
	break;

      case 'c':
	print_context_flag++;
	break;

      case 'e':
	ed_script_flag++;
	break;

      case 'h':
	err_msg ("Warning, useless option.  ",
		 "GNU doesn't do half-hearted jobs.");
	break;

      case 'i':
	ignore_case_flag++;
	complex_cmp++;
	break;

      case 's':
	print_file_same_flag++;
	break;

      case 'v':
	print_english_headers_flag++;
	break;

      default:
	{ char opt_str[2];

	  opt_str[0] = ap[-1];
	  opt_str[1] = EOS;
	  err_msg ("Warning, bad option:", opt_str);
	  break;
	}
      }
    }
    argc--;
    argv++;
  }

  if (argc != 3)
    fatal ("requires two file names.  Usage: diff [-options] file1 file2");
  /* @@ need more complicated usage string for directory options??
     Note three liner at top of BSD documentation, and John Gilmore message
     in his public domain tar being used by GNU.  */

  if (print_context_flag && ed_script_flag) {
    /* @@ more flags are incompatible -f, -Dstring*/
    err_msg ("Warning, -c and -e are incompatible, -c supressed.\n", "");
    print_context_flag = FALSE;
  }

  argv++;

  for (i = 0; i <= 1; i++) {
    if (argv[i][0] == '-' && argv[i][1] == EOS) {
      inf[i].desc = 0;
      inf[i].name = "Standard Input";
    }
    else {
      char *filename = argv[i];

      if (0 > (inf[i].desc = open (filename, O_RDONLY, 0)))
	sys_err_msg (filename);
      else {
	extern int stat();

	inf[i].name = filename;
	if (fstat (inf[i].desc, &inf[i].stat) < 0)
	  sys_err_msg (inf[i].name);

	inf[i].argv_dir_p = (S_IFDIR == (inf[i].stat.st_mode & S_IFDIR));
      }
    }
  }

  if ((inf[0].desc < 0) || (inf[1].desc < 0))
    fatal ("can not continue");

  if (inf[0].argv_dir_p && inf[1].argv_dir_p) {
    /* diff all files in both directories */
    abort ();
  }
  else {
    if (inf[0].argv_dir_p || inf[1].argv_dir_p) {
      int dir_arg = (inf[0].argv_dir_p ? 0 : 1);
      int fnm_arg = (inf[0].argv_dir_p ? 1 : 0);
      char *filename = (char *) alloca (strlen (argv[dir_arg]) +
					strlen (argv[fnm_arg]) + 2);
      extern int stat();

      strcpy (filename, argv[dir_arg]);
      strcat (filename, "/");
      strcat (filename, argv[fnm_arg]);

      if (0 > (inf[dir_arg].desc = open (filename, O_RDONLY, 0))) {
	sys_err_msg (filename);
	fatal ("can not continue");
      }
      else {
	inf[dir_arg].name = filename;
	if (fstat (inf[dir_arg].desc, &inf[dir_arg].stat) < 0) {
	  sys_err_msg (inf[dir_arg].name);
	  fatal ("can not continue");
	}
      }
    }

    diff_2_files();
  }
  exit (exit_status);
}

/*
Local variables:
compile-command: "cc -o diff-new -g diff-new.c"
End:
*/
